#Step 1: Wake up packages
library(tidyverse)
library(lubridate)
library(httpuv)
library(leaflet)
#Step 2: Load the data
stops <- read_csv("https://datajournalism.tech/wp-content/uploads/2019/10/wichita.csv")
## Warning: Missing column names filled in: 'X1' [1]
#Step 3: Fix values
population_2016 <- tibble(subject_race=c("asian/pacific islander","black","hispanic","other/unknown","white"),num_people=c(19294,42485,65090,16686,245499))
center_lat <- 37.689811
center_lng <- -97.332332
#Step 4: Examine the data
str(stops)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 57750 obs. of 22 variables:
## $ X1 : num 1 2 3 4 5 6 7 8 9 10 ...
## $ raw_row_number : chr "923578" "923657" "912091" "923680" ...
## $ date : Date, format: "2016-01-01" "2016-01-01" ...
## $ time : 'hms' num 18:00:00 18:08:00 18:11:00 18:13:00 ...
## ..- attr(*, "units")= chr "secs"
## $ location : chr "N WEST ST, KS, 67205" "8000 W 13TH ST N, WICHITA, KS, 67212" "500 S LIMUEL ST, WICHITA, KS, 67235" "7600 W 21ST ST N, WICHITA, KS, 67205" ...
## $ lat : num 37.7 37.7 37.7 37.7 37.7 ...
## $ lng : num -97.4 -97.4 -97.5 -97.4 -97.4 ...
## $ subject_age : num 16 44 20 21 28 27 15 20 23 NA ...
## $ subject_race : chr "white" "white" "white" "hispanic" ...
## $ subject_sex : chr "female" "male" "male" "female" ...
## $ type : chr "vehicular" "vehicular" "vehicular" "vehicular" ...
## $ disposition : chr "DISMISSED" "GUILTY (IVR)" "DISMISSED WITH PREJUDICE; DISMISSED WITH PREJUDICE" "GUILTY" ...
## $ violation : chr "RUN STOP SIGN" "SPEED OVER LIMIT" "DUI; INATTENTIVE DRIVING" "SPEED OVER LIMIT" ...
## $ citation_issued : logi TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ outcome : chr "citation" "citation" "citation" "citation" ...
## $ posted_speed : num NA 40 NA 40 40 40 NA NA NA NA ...
## $ vehicle_color : chr "BURGUNDY OR MAROON" "\"ALUMINUM, SILVER\"" "WHITE" "\"ALUMINUM, SILVER\"" ...
## $ vehicle_make : chr "JEEP (1989 TO PRESENT)" "HYUNDAI" "HONDA" "TOYOTA" ...
## $ vehicle_model : chr NA "TUCSON" NA NA ...
## $ vehicle_year : num 2008 NA NA NA NA ...
## $ raw_defendant_race : chr "W" "W" "W" "W" ...
## $ raw_defendant_ethnicity: chr "N" "N" "N" "H" ...
## - attr(*, "spec")=
## .. cols(
## .. X1 = col_double(),
## .. raw_row_number = col_character(),
## .. date = col_date(format = ""),
## .. time = col_time(format = ""),
## .. location = col_character(),
## .. lat = col_double(),
## .. lng = col_double(),
## .. subject_age = col_double(),
## .. subject_race = col_character(),
## .. subject_sex = col_character(),
## .. type = col_character(),
## .. disposition = col_character(),
## .. violation = col_character(),
## .. citation_issued = col_logical(),
## .. outcome = col_character(),
## .. posted_speed = col_double(),
## .. vehicle_color = col_character(),
## .. vehicle_make = col_character(),
## .. vehicle_model = col_character(),
## .. vehicle_year = col_double(),
## .. raw_defendant_race = col_character(),
## .. raw_defendant_ethnicity = col_character()
## .. )
summary(stops)
## X1 raw_row_number date time
## Min. : 1 Length:57750 Min. :2016-01-01 Length:57750
## 1st Qu.:14438 Class :character 1st Qu.:2016-03-16 Class1:hms
## Median :28876 Mode :character Median :2016-05-29 Class2:difftime
## Mean :28876 Mean :2016-06-10 Mode :numeric
## 3rd Qu.:43313 3rd Qu.:2016-08-31
## Max. :57750 Max. :2016-12-31
##
## location lat lng subject_age
## Length:57750 Min. :37.47 Min. :-101.36 Min. :11.00
## Class :character 1st Qu.:37.67 1st Qu.: -97.37 1st Qu.:24.00
## Mode :character Median :37.69 Median : -97.34 Median :33.00
## Mean :37.69 Mean : -97.33 Mean :36.71
## 3rd Qu.:37.70 3rd Qu.: -97.28 3rd Qu.:48.00
## Max. :38.48 Max. : -96.75 Max. :99.00
## NA's :1167 NA's :1167 NA's :10128
## subject_race subject_sex type
## Length:57750 Length:57750 Length:57750
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## disposition violation citation_issued outcome
## Length:57750 Length:57750 Mode:logical Length:57750
## Class :character Class :character TRUE:57750 Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## posted_speed vehicle_color vehicle_make vehicle_model
## Min. : 20.00 Length:57750 Length:57750 Length:57750
## 1st Qu.: 30.00 Class :character Class :character Class :character
## Median : 40.00 Mode :character Mode :character Mode :character
## Mean : 39.93
## 3rd Qu.: 40.00
## Max. :304.00
## NA's :35149
## vehicle_year raw_defendant_race raw_defendant_ethnicity
## Min. :1962 Length:57750 Length:57750
## 1st Qu.:2001 Class :character Class :character
## Median :2005 Mode :character Mode :character
## Mean :2005
## 3rd Qu.:2009
## Max. :2999
## NA's :43236
colnames(stops)
## [1] "X1" "raw_row_number"
## [3] "date" "time"
## [5] "location" "lat"
## [7] "lng" "subject_age"
## [9] "subject_race" "subject_sex"
## [11] "type" "disposition"
## [13] "violation" "citation_issued"
## [15] "outcome" "posted_speed"
## [17] "vehicle_color" "vehicle_make"
## [19] "vehicle_model" "vehicle_year"
## [21] "raw_defendant_race" "raw_defendant_ethnicity"
nrow(stops)
## [1] 57750
#Step 5: Count stops by race
stops %>%
count(subject_race)
## # A tibble: 5 x 2
## subject_race n
## <chr> <int>
## 1 asian/pacific islander 1607
## 2 black 8038
## 3 hispanic 6709
## 4 other/unknown 9335
## 5 white 32061
stops %>%
group_by(subject_race) %>%
summarize(n=n(),prop=n/nrow(.))
## # A tibble: 5 x 3
## subject_race n prop
## <chr> <int> <dbl>
## 1 asian/pacific islander 1607 0.0278
## 2 black 8038 0.139
## 3 hispanic 6709 0.116
## 4 other/unknown 9335 0.162
## 5 white 32061 0.555
#Step 6: Compare to total population
population_2016 %>%
mutate(prop=num_people/sum(num_people))
## # A tibble: 5 x 3
## subject_race num_people prop
## <chr> <dbl> <dbl>
## 1 asian/pacific islander 19294 0.0496
## 2 black 42485 0.109
## 3 hispanic 65090 0.167
## 4 other/unknown 16686 0.0429
## 5 white 245499 0.631
#Step 7: Join data
race <- stops %>%
count(subject_race)%>%
left_join(population_2016,by="subject_race")%>%
mutate(stop_rate=n/num_people)
#Step 8: Data visualization, bar chart
bar <- ggplot(race,aes(x=reorder(subject_race,stop_rate),y=stop_rate))+
geom_bar(stat="identity", fill="blue")+
geom_hline(yintercept=0)+
labs(title="Stopped Drivers by Race",subtitle="Wichita, KS 2016")+
coord_flip()
options(scipen=10000)
bar
#Step 9: Data visualization, map
race <- colorFactor(c("pink", "black", "yellow", "red", "blue"),
domain=c("white", "black", "asian/pacific islander", "hispanic", "other/unknown"),
ordered=TRUE)
ac <- stops %>% drop_na(lat, lng)
map <- leaflet(ac) %>%
addProviderTiles(providers$CartoDB) %>%
setView(lng= -97.318689, lat= 37.683568, zoom=16) %>%
addCircleMarkers(~lng,
~lat,
popup=paste("This is a/an", ac$subject_race, "and", ac$subject_sex, "driver."),
weight= 3,
radius=4,
color=~race(subject_race),
stroke=F,
fillOpacity = 1)
map